From bb5de1f2ef66a4775c8d8bc4f632535d45b3f0b4 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 24 Apr 2017 10:48:06 +0900
Subject: [PATCH 1/6] Fill up documentations for functions in Column API in
 PySpark

---
 python/pyspark/sql/column.py                  | 101 ++++++++++++++----
 .../expressions/bitwiseExpressions.scala      |   2 +-
 .../scala/org/apache/spark/sql/Column.scala   |  19 ++--
 3 files changed, 90 insertions(+), 32 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 46c1707cb6c37..88a9d4097fd81 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -185,9 +185,44 @@ def __contains__(self, item):
                          "in a string column or 'array_contains' function for an array column.")
 
     # bitwise operators
-    bitwiseOR = _bin_op("bitwiseOR")
-    bitwiseAND = _bin_op("bitwiseAND")
-    bitwiseXOR = _bin_op("bitwiseXOR")
+    _bitwiseOR_doc = """
+    Compute bitwise OR of this expression with another expression.
+
+    :param other: a value or :class:`Column` to calculate bitwise or(|) against
+                  this :class:`Column`.
+
+    >>> from pyspark.sql import Row
+    >>> df3 = spark.createDataFrame([Row(a=170, b=75)])
+    >>> df3.select(df3.a.bitwiseOR(df3.b)).collect()
+    [Row((a | b)=235)]
+    """
+
+    _bitwiseAND_doc = """
+    Compute bitwise AND of this expression with another expression.
+
+    :param other: a value or :class:`Column` to calculate bitwise and(&) against
+                  this :class:`Column`.
+
+    >>> from pyspark.sql import Row
+    >>> df3 = spark.createDataFrame([Row(a=170, b=75)])
+    >>> df3.select(df3.a.bitwiseAND(df3.b)).collect()
+    [Row((a & b)=10)]
+    """
+
+    _bitwiseXOR_doc = """
+    Compute bitwise XOR of this expression with another expression.
+
+    :param other: a value or :class:`Column` to calculate bitwise xor(^) against
+                  this :class:`Column`.
+
+    >>> from pyspark.sql import Row
+    >>> df3 = spark.createDataFrame([Row(a=170, b=75)])
+    >>> df3.select(df3.a.bitwiseXOR(df3.b)).collect()
+    [Row((a ^ b)=225)]
+    """
+    bitwiseOR = _bin_op("bitwiseOR", _bitwiseOR_doc)
+    bitwiseAND = _bin_op("bitwiseAND", _bitwiseAND_doc)
+    bitwiseXOR = _bin_op("bitwiseXOR", _bitwiseXOR_doc)
 
     @since(1.3)
     def getItem(self, key):
@@ -195,7 +230,7 @@ def getItem(self, key):
         An expression that gets an item at position ``ordinal`` out of a list,
         or gets an item by key out of a dict.
 
-        >>> df = sc.parallelize([([1, 2], {"key": "value"})]).toDF(["l", "d"])
+        >>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"])
         >>> df.select(df.l.getItem(0), df.d.getItem("key")).show()
         +----+------+
         |l[0]|d[key]|
@@ -217,7 +252,7 @@ def getField(self, name):
         An expression that gets a field by name in a StructField.
 
         >>> from pyspark.sql import Row
-        >>> df = sc.parallelize([Row(r=Row(a=1, b="b"))]).toDF()
+        >>> df = spark.createDataFrame([Row(r=Row(a=1, b="b"))])
         >>> df.select(df.r.getField("b")).show()
         +---+
         |r.b|
@@ -251,7 +286,8 @@ def __iter__(self):
 
     # string methods
     _rlike_doc = """
-    Return a Boolean :class:`Column` based on a regex match.
+    SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex
+    match.
 
     :param other: an extended regex expression
 
@@ -259,7 +295,7 @@ def __iter__(self):
     [Row(age=2, name=u'Alice')]
     """
     _like_doc = """
-    Return a Boolean :class:`Column` based on a SQL LIKE match.
+    SQL like expression. Returns a boolean :class:`Column` based on a SQL LIKE match.
 
     :param other: a SQL LIKE pattern
 
@@ -269,9 +305,9 @@ def __iter__(self):
     [Row(age=2, name=u'Alice')]
     """
     _startswith_doc = """
-    Return a Boolean :class:`Column` based on a string match.
+    String starts with. Returns a boolean :class:`Column` based on a string match.
 
-    :param other: string at end of line (do not use a regex `^`)
+    :param other: string at start of line (do not use a regex `^`)
 
     >>> df.filter(df.name.startswith('Al')).collect()
     [Row(age=2, name=u'Alice')]
@@ -279,7 +315,7 @@ def __iter__(self):
     []
     """
     _endswith_doc = """
-    Return a Boolean :class:`Column` based on matching end of string.
+    String ends with. Returns a boolean :class:`Column` based on a string match.
 
     :param other: string at end of line (do not use a regex `$`)
 
@@ -288,8 +324,16 @@ def __iter__(self):
     >>> df.filter(df.name.endswith('ice$')).collect()
     []
     """
+    _contains_doc = """
+    Contains the other element. Returns a boolean :class:`Column` based on a string match.
+
+    :param other: string in line
+
+    >>> df.filter(df.name.contains('o')).collect()
+    [Row(age=5, name=u'Bob')]
+    """
 
-    contains = _bin_op("contains")
+    contains = ignore_unicode_prefix(_bin_op("contains", _contains_doc))
     rlike = ignore_unicode_prefix(_bin_op("rlike", _rlike_doc))
     like = ignore_unicode_prefix(_bin_op("like", _like_doc))
     startswith = ignore_unicode_prefix(_bin_op("startsWith", _startswith_doc))
@@ -337,26 +381,39 @@ def isin(self, *cols):
         return Column(jc)
 
     # order
-    asc = _unary_op("asc", "Returns a sort expression based on the"
-                           " ascending order of the given column name.")
-    desc = _unary_op("desc", "Returns a sort expression based on the"
-                             " descending order of the given column name.")
+    _asc_doc = """
+    Returns an ascending ordering used in sorting.
+
+    >>> from pyspark.sql import Row
+    >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
+    >>> df2.select(df2.name).orderBy(df2.name.asc()).collect()
+    [Row(name=u'Alice'), Row(name=u'Tom')]
+    """
+    _desc_doc = """
+    Returns a descending ordering used in sorting.
+
+    >>> from pyspark.sql import Row
+    >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
+    >>> df2.select(df2.name).orderBy(df2.name.desc()).collect()
+    [Row(name=u'Tom'), Row(name=u'Alice')]
+    """
+
+    asc = ignore_unicode_prefix(_unary_op("asc", _asc_doc))
+    desc = ignore_unicode_prefix(_unary_op("desc", _desc_doc))
 
     _isNull_doc = """
-    True if the current expression is null. Often combined with
-    :func:`DataFrame.filter` to select rows with null values.
+    True if the current expression is null.
 
     >>> from pyspark.sql import Row
-    >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
+    >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
     >>> df2.filter(df2.height.isNull()).collect()
     [Row(height=None, name=u'Alice')]
     """
     _isNotNull_doc = """
-    True if the current expression is null. Often combined with
-    :func:`DataFrame.filter` to select rows with non-null values.
+    True if the current expression is NOT null.
 
     >>> from pyspark.sql import Row
-    >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
+    >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
     >>> df2.filter(df2.height.isNotNull()).collect()
     [Row(height=80, name=u'Tom')]
     """
@@ -527,7 +584,7 @@ def _test():
         .appName("sql.column tests")\
         .getOrCreate()
     sc = spark.sparkContext
-    globs['sc'] = sc
+    globs['spark'] = spark
     globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
         .toDF(StructType([StructField('age', IntegerType()),
                           StructField('name', StringType())]))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 2918040771433..425efbb6c96c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -86,7 +86,7 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
 }
 
 /**
- * A function that calculates bitwise xor of two numbers.
+ * A function that calculates bitwise xor({@literal ^}) of two numbers.
  *
  * Code generation inherited from BinaryArithmetic.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 43de2de7e7094..0d72ce631a086 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -779,7 +779,7 @@ class Column(val expr: Expression) extends Logging {
   def isin(list: Any*): Column = withExpr { In(expr, list.map(lit(_).expr)) }
 
   /**
-   * SQL like expression.
+   * SQL like expression. Returns a boolean column based on a SQL LIKE match.
    *
    * @group expr_ops
    * @since 1.3.0
@@ -787,7 +787,8 @@ class Column(val expr: Expression) extends Logging {
   def like(literal: String): Column = withExpr { Like(expr, lit(literal).expr) }
 
   /**
-   * SQL RLIKE expression (LIKE with Regex).
+   * SQL RLIKE expression (LIKE with Regex). Returns a boolean column based on a regex
+   * match.
    *
    * @group expr_ops
    * @since 1.3.0
@@ -838,7 +839,7 @@ class Column(val expr: Expression) extends Logging {
   }
 
   /**
-   * Contains the other element.
+   * Contains the other element. Returns a boolean column based on a string match.
    *
    * @group expr_ops
    * @since 1.3.0
@@ -846,7 +847,7 @@ class Column(val expr: Expression) extends Logging {
   def contains(other: Any): Column = withExpr { Contains(expr, lit(other).expr) }
 
   /**
-   * String starts with.
+   * String starts with. Returns a boolean column based on a string match.
    *
    * @group expr_ops
    * @since 1.3.0
@@ -854,7 +855,7 @@ class Column(val expr: Expression) extends Logging {
   def startsWith(other: Column): Column = withExpr { StartsWith(expr, lit(other).expr) }
 
   /**
-   * String starts with another string literal.
+   * String starts with another string literal. Returns a boolean column based on a string match.
    *
    * @group expr_ops
    * @since 1.3.0
@@ -862,7 +863,7 @@ class Column(val expr: Expression) extends Logging {
   def startsWith(literal: String): Column = this.startsWith(lit(literal))
 
   /**
-   * String ends with.
+   * String ends with. Returns a boolean column based on a string match.
    *
    * @group expr_ops
    * @since 1.3.0
@@ -870,7 +871,7 @@ class Column(val expr: Expression) extends Logging {
   def endsWith(other: Column): Column = withExpr { EndsWith(expr, lit(other).expr) }
 
   /**
-   * String ends with another string literal.
+   * String ends with another string literal. Returns a boolean column based on a string match.
    *
    * @group expr_ops
    * @since 1.3.0
@@ -1008,7 +1009,7 @@ class Column(val expr: Expression) extends Logging {
   def cast(to: String): Column = cast(CatalystSqlParser.parseDataType(to))
 
   /**
-   * Returns an ordering used in sorting.
+   * Returns a descending ordering used in sorting.
    * {{{
    *   // Scala
    *   df.sort(df("age").desc)
@@ -1083,7 +1084,7 @@ class Column(val expr: Expression) extends Logging {
   def asc_nulls_first: Column = withExpr { SortOrder(expr, Ascending, NullsFirst, Set.empty) }
 
   /**
-   * Returns an ordering used in sorting, where null values appear after non-null values.
+   * Returns an ascending ordering used in sorting, where null values appear after non-null values.
    * {{{
    *   // Scala: sort a DataFrame by age column in ascending order and null values appearing last.
    *   df.sort(df("age").asc_nulls_last)

From 92347de5dd544482e2afb9b69f078727224652bd Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 24 Apr 2017 12:51:58 +0900
Subject: [PATCH 2/6] Match asc/desc in functions.scala, Colum.scala,
 functions.py and column.py

---
 python/pyspark/sql/column.py                     |  4 ++--
 .../main/scala/org/apache/spark/sql/Column.scala | 16 ++++++++++------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 88a9d4097fd81..254361f3345de 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -382,7 +382,7 @@ def isin(self, *cols):
 
     # order
     _asc_doc = """
-    Returns an ascending ordering used in sorting.
+    Returns a sort expression based on the descending order of the given column name
 
     >>> from pyspark.sql import Row
     >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
@@ -390,7 +390,7 @@ def isin(self, *cols):
     [Row(name=u'Alice'), Row(name=u'Tom')]
     """
     _desc_doc = """
-    Returns a descending ordering used in sorting.
+    Returns a sort expression based on the descending order of the given column name.
 
     >>> from pyspark.sql import Row
     >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 0d72ce631a086..b23ab1fa3514a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -1009,7 +1009,7 @@ class Column(val expr: Expression) extends Logging {
   def cast(to: String): Column = cast(CatalystSqlParser.parseDataType(to))
 
   /**
-   * Returns a descending ordering used in sorting.
+   * Returns a sort expression based on the descending order of the column.
    * {{{
    *   // Scala
    *   df.sort(df("age").desc)
@@ -1024,7 +1024,8 @@ class Column(val expr: Expression) extends Logging {
   def desc: Column = withExpr { SortOrder(expr, Descending) }
 
   /**
-   * Returns a descending ordering used in sorting, where null values appear before non-null values.
+   * Returns a sort expression based on the descending order of the column,
+   * and null values appear before non-null values.
    * {{{
    *   // Scala: sort a DataFrame by age column in descending order and null values appearing first.
    *   df.sort(df("age").desc_nulls_first)
@@ -1039,7 +1040,8 @@ class Column(val expr: Expression) extends Logging {
   def desc_nulls_first: Column = withExpr { SortOrder(expr, Descending, NullsFirst, Set.empty) }
 
   /**
-   * Returns a descending ordering used in sorting, where null values appear after non-null values.
+   * Returns a sort expression based on the descending order of the column,
+   * and null values appear after non-null values.
    * {{{
    *   // Scala: sort a DataFrame by age column in descending order and null values appearing last.
    *   df.sort(df("age").desc_nulls_last)
@@ -1054,7 +1056,7 @@ class Column(val expr: Expression) extends Logging {
   def desc_nulls_last: Column = withExpr { SortOrder(expr, Descending, NullsLast, Set.empty) }
 
   /**
-   * Returns an ascending ordering used in sorting.
+   * Returns a sort expression based on ascending order of the column.
    * {{{
    *   // Scala: sort a DataFrame by age column in ascending order.
    *   df.sort(df("age").asc)
@@ -1069,7 +1071,8 @@ class Column(val expr: Expression) extends Logging {
   def asc: Column = withExpr { SortOrder(expr, Ascending) }
 
   /**
-   * Returns an ascending ordering used in sorting, where null values appear before non-null values.
+   * Returns a sort expression based on ascending order of the column,
+   * and null values return before non-null values.
    * {{{
    *   // Scala: sort a DataFrame by age column in ascending order and null values appearing first.
    *   df.sort(df("age").asc_nulls_last)
@@ -1084,7 +1087,8 @@ class Column(val expr: Expression) extends Logging {
   def asc_nulls_first: Column = withExpr { SortOrder(expr, Ascending, NullsFirst, Set.empty) }
 
   /**
-   * Returns an ascending ordering used in sorting, where null values appear after non-null values.
+   * Returns a sort expression based on ascending order of the column,
+   * and null values appear after non-null values.
    * {{{
    *   // Scala: sort a DataFrame by age column in ascending order and null values appearing last.
    *   df.sort(df("age").asc_nulls_last)

From af8ac74b624d54b16339083319e33e8af098655e Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 24 Apr 2017 12:52:59 +0900
Subject: [PATCH 3/6] Match functions.scala, Column.scala, functions.py and
 column.py

---
 python/pyspark/sql/column.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 254361f3345de..02867aa57129e 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -382,7 +382,7 @@ def isin(self, *cols):
 
     # order
     _asc_doc = """
-    Returns a sort expression based on the descending order of the given column name
+    Returns a sort expression based on the ascending order of the given column name
 
     >>> from pyspark.sql import Row
     >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])

From 2815ff167b0ce9f6e0d2d6ae9f3d4fb0f3ce94d2 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 24 Apr 2017 13:16:56 +0900
Subject: [PATCH 4/6] Consistent newlines

---
 python/pyspark/sql/column.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 02867aa57129e..3d5cd7ed84b92 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -196,7 +196,6 @@ def __contains__(self, item):
     >>> df3.select(df3.a.bitwiseOR(df3.b)).collect()
     [Row((a | b)=235)]
     """
-
     _bitwiseAND_doc = """
     Compute bitwise AND of this expression with another expression.
 
@@ -208,7 +207,6 @@ def __contains__(self, item):
     >>> df3.select(df3.a.bitwiseAND(df3.b)).collect()
     [Row((a & b)=10)]
     """
-
     _bitwiseXOR_doc = """
     Compute bitwise XOR of this expression with another expression.
 
@@ -220,6 +218,7 @@ def __contains__(self, item):
     >>> df3.select(df3.a.bitwiseXOR(df3.b)).collect()
     [Row((a ^ b)=225)]
     """
+
     bitwiseOR = _bin_op("bitwiseOR", _bitwiseOR_doc)
     bitwiseAND = _bin_op("bitwiseAND", _bitwiseAND_doc)
     bitwiseXOR = _bin_op("bitwiseXOR", _bitwiseXOR_doc)

From eaeb4564562272ae021fa1a7a8a083ccc56e5c33 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 25 Apr 2017 14:18:31 +0900
Subject: [PATCH 5/6] Move _contains_doc up

---
 python/pyspark/sql/column.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 3d5cd7ed84b92..58f4caf358fef 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -284,6 +284,14 @@ def __iter__(self):
         raise TypeError("Column is not iterable")
 
     # string methods
+    _contains_doc = """
+    Contains the other element. Returns a boolean :class:`Column` based on a string match.
+
+    :param other: string in line
+
+    >>> df.filter(df.name.contains('o')).collect()
+    [Row(age=5, name=u'Bob')]
+    """
     _rlike_doc = """
     SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex
     match.
@@ -323,14 +331,6 @@ def __iter__(self):
     >>> df.filter(df.name.endswith('ice$')).collect()
     []
     """
-    _contains_doc = """
-    Contains the other element. Returns a boolean :class:`Column` based on a string match.
-
-    :param other: string in line
-
-    >>> df.filter(df.name.contains('o')).collect()
-    [Row(age=5, name=u'Bob')]
-    """
 
     contains = ignore_unicode_prefix(_bin_op("contains", _contains_doc))
     rlike = ignore_unicode_prefix(_bin_op("rlike", _rlike_doc))

From 0fd9e37051161bf7d54be2989163e101647f5d85 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sat, 29 Apr 2017 18:54:12 +0900
Subject: [PATCH 6/6] df# to df

---
 python/pyspark/sql/column.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 58f4caf358fef..b8df37f25180f 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -192,8 +192,8 @@ def __contains__(self, item):
                   this :class:`Column`.
 
     >>> from pyspark.sql import Row
-    >>> df3 = spark.createDataFrame([Row(a=170, b=75)])
-    >>> df3.select(df3.a.bitwiseOR(df3.b)).collect()
+    >>> df = spark.createDataFrame([Row(a=170, b=75)])
+    >>> df.select(df.a.bitwiseOR(df.b)).collect()
     [Row((a | b)=235)]
     """
     _bitwiseAND_doc = """
@@ -203,8 +203,8 @@ def __contains__(self, item):
                   this :class:`Column`.
 
     >>> from pyspark.sql import Row
-    >>> df3 = spark.createDataFrame([Row(a=170, b=75)])
-    >>> df3.select(df3.a.bitwiseAND(df3.b)).collect()
+    >>> df = spark.createDataFrame([Row(a=170, b=75)])
+    >>> df.select(df.a.bitwiseAND(df.b)).collect()
     [Row((a & b)=10)]
     """
     _bitwiseXOR_doc = """
@@ -214,8 +214,8 @@ def __contains__(self, item):
                   this :class:`Column`.
 
     >>> from pyspark.sql import Row
-    >>> df3 = spark.createDataFrame([Row(a=170, b=75)])
-    >>> df3.select(df3.a.bitwiseXOR(df3.b)).collect()
+    >>> df = spark.createDataFrame([Row(a=170, b=75)])
+    >>> df.select(df.a.bitwiseXOR(df.b)).collect()
     [Row((a ^ b)=225)]
     """
 
@@ -384,16 +384,16 @@ def isin(self, *cols):
     Returns a sort expression based on the ascending order of the given column name
 
     >>> from pyspark.sql import Row
-    >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
-    >>> df2.select(df2.name).orderBy(df2.name.asc()).collect()
+    >>> df = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
+    >>> df.select(df.name).orderBy(df.name.asc()).collect()
     [Row(name=u'Alice'), Row(name=u'Tom')]
     """
     _desc_doc = """
     Returns a sort expression based on the descending order of the given column name.
 
     >>> from pyspark.sql import Row
-    >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
-    >>> df2.select(df2.name).orderBy(df2.name.desc()).collect()
+    >>> df = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
+    >>> df.select(df.name).orderBy(df.name.desc()).collect()
     [Row(name=u'Tom'), Row(name=u'Alice')]
     """
 
@@ -404,16 +404,16 @@ def isin(self, *cols):
     True if the current expression is null.
 
     >>> from pyspark.sql import Row
-    >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
-    >>> df2.filter(df2.height.isNull()).collect()
+    >>> df = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
+    >>> df.filter(df.height.isNull()).collect()
     [Row(height=None, name=u'Alice')]
     """
     _isNotNull_doc = """
     True if the current expression is NOT null.
 
     >>> from pyspark.sql import Row
-    >>> df2 = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
-    >>> df2.filter(df2.height.isNotNull()).collect()
+    >>> df = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
+    >>> df.filter(df.height.isNotNull()).collect()
     [Row(height=80, name=u'Tom')]
     """