From 0bd6fdd11acc364d17b690773b714211994e330c Mon Sep 17 00:00:00 2001
From: Andrew Or <andrewor14@gmail.com>
Date: Wed, 11 Jun 2014 10:53:54 -0700
Subject: [PATCH 1/2] Fix 3 pyspark tests not being invoked

---
 bin/pyspark      | 20 +++++++++++++-------
 python/run-tests |  3 ++-
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/bin/pyspark b/bin/pyspark
index 114cbbc3a8a8e..0b5ed40e2157d 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -45,7 +45,7 @@ fi
 . $FWDIR/bin/load-spark-env.sh
 
 # Figure out which Python executable to use
-if [ -z "$PYSPARK_PYTHON" ] ; then
+if [[ -z "$PYSPARK_PYTHON" ]]; then
   PYSPARK_PYTHON="python"
 fi
 export PYSPARK_PYTHON
@@ -59,7 +59,7 @@ export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
 export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
 
 # If IPython options are specified, assume user wants to run IPython
-if [ -n "$IPYTHON_OPTS" ]; then
+if [[ -n "$IPYTHON_OPTS" ]]; then
   IPYTHON=1
 fi
 
@@ -76,6 +76,16 @@ for i in "$@"; do
 done
 export PYSPARK_SUBMIT_ARGS
 
+# For pyspark tests
+if [[ -n "$SPARK_TESTING" ]]; then
+  if [[ -n "$PYSPARK_DOC_TEST" ]]; then
+    exec "$PYSPARK_PYTHON" -m doctest $1
+  else
+    exec "$PYSPARK_PYTHON" $1
+  fi
+  exit
+fi
+
 # If a python file is provided, directly run spark-submit.
 if [[ "$1" =~ \.py$ ]]; then
   echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
@@ -86,10 +96,6 @@ else
   if [[ "$IPYTHON" = "1" ]]; then
     exec ipython $IPYTHON_OPTS
   else
-    if [[ -n $SPARK_TESTING ]]; then
-      exec "$PYSPARK_PYTHON" -m doctest
-    else
-      exec "$PYSPARK_PYTHON"
-    fi
+    exec "$PYSPARK_PYTHON"
   fi
 fi
diff --git a/python/run-tests b/python/run-tests
index 3b4501178c89f..55db67bb76029 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -44,7 +44,6 @@ function run_test() {
         echo -en "\033[0m"  # No color
         exit -1
     fi
-
 }
 
 echo "Running PySpark tests. Output is in python/unit-tests.log."
@@ -55,9 +54,11 @@ run_test "pyspark/conf.py"
 if [ -n "$_RUN_SQL_TESTS" ]; then
   run_test "pyspark/sql.py"
 fi
+export PYSPARK_DOC_TEST=1
 run_test "pyspark/broadcast.py"
 run_test "pyspark/accumulators.py"
 run_test "pyspark/serializers.py"
+unset PYSPARK_DOC_TEST
 run_test "pyspark/tests.py"
 run_test "pyspark/mllib/_common.py"
 run_test "pyspark/mllib/classification.py"

From d2e540127eccbf2d3d2ca0919faf12e98408a29e Mon Sep 17 00:00:00 2001
From: Andrew Or <andrewor14@gmail.com>
Date: Wed, 11 Jun 2014 11:23:36 -0700
Subject: [PATCH 2/2] Explain why these tests are handled differently

---
 python/run-tests | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/run-tests b/python/run-tests
index 55db67bb76029..9282aa47e8375 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -54,6 +54,8 @@ run_test "pyspark/conf.py"
 if [ -n "$_RUN_SQL_TESTS" ]; then
   run_test "pyspark/sql.py"
 fi
+# These tests are included in the module-level docs, and so must
+# be handled on a higher level rather than within the python file.
 export PYSPARK_DOC_TEST=1
 run_test "pyspark/broadcast.py"
 run_test "pyspark/accumulators.py"