Skip to content

Commit

Permalink
Prints out if Pandas and PyArrow are installed or not in PySpark SQL …
Browse files Browse the repository at this point in the history
…tests
  • Loading branch information
HyukjinKwon committed Feb 5, 2018
1 parent 6fb3fd1 commit fe2943e
Showing 1 changed file with 67 additions and 5 deletions.
72 changes: 67 additions & 5 deletions python/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,16 @@
import Queue
else:
import queue as Queue
from distutils.version import LooseVersion


# Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../dev/"))


from sparktestsupport import SPARK_HOME # noqa (suppress pep8 warnings)
from sparktestsupport.shellutils import which, subprocess_check_output, run_cmd # noqa
from sparktestsupport.modules import all_modules # noqa
from sparktestsupport.shellutils import which, subprocess_check_output # noqa
from sparktestsupport.modules import all_modules, pyspark_sql # noqa


python_modules = dict((m.name, m) for m in all_modules if m.python_test_goals if m.name != 'root')
Expand Down Expand Up @@ -151,6 +152,66 @@ def parse_opts():
return opts


def _check_dependencies(python_exec, modules_to_test):
if "COVERAGE_PROCESS_START" in os.environ:
# Make sure if coverage is installed.
try:
subprocess_check_output(
[python_exec, "-c", "import coverage"],
stderr=open(os.devnull, 'w'))
except:
print_red("Coverage is not installed in Python executable '%s' "
"but 'COVERAGE_PROCESS_START' environment variable is set, "
"exiting." % python_exec)
sys.exit(-1)

# If we should test 'pyspark-sql', it checks if PyArrow and Pandas are installed and
# explicitly prints out. See SPARK-23300.
if pyspark_sql in modules_to_test:
minimum_pyarrow_version = '0.8.0'
minimum_pandas_version = '0.19.2'

try:
pyarrow_version = subprocess_check_output(
[python_exec, "-c", "import pyarrow; print(pyarrow.__version__)"],
universal_newlines=True,
stderr=open(os.devnull, 'w')).strip()
if LooseVersion(pyarrow_version) >= LooseVersion(minimum_pyarrow_version):
LOGGER.info("Will test PyArrow related features against Python executable "
"'%s' in '%s' module." % (python_exec, pyspark_sql.name))
else:
LOGGER.warning(
"Will skip PyArrow related features against Python executable "
"'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
"%s was found." % (
python_exec, pyspark_sql.name, minimum_pyarrow_version, pyarrow_version))
except:
LOGGER.warning(
"Will skip PyArrow related features against Python executable "
"'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
"was not found." % (python_exec, pyspark_sql.name, minimum_pyarrow_version))

try:
pandas_version = subprocess_check_output(
[python_exec, "-c", "import pandas; print(pandas.__version__)"],
universal_newlines=True,
stderr=open(os.devnull, 'w')).strip()
if LooseVersion(pandas_version) >= LooseVersion(minimum_pandas_version):
LOGGER.info("Will test Pandas related features against Python executable "
"'%s' in '%s' module." % (python_exec, pyspark_sql.name))
else:
LOGGER.warning(
"Will skip Pandas related features against Python executable "
"'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
"%s was found." % (
python_exec, pyspark_sql.name, minimum_pandas_version, pandas_version))
except:
LOGGER.warning(
"Will skip Pandas related features against Python executable "
"'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
"was not found." % (python_exec, pyspark_sql.name, minimum_pandas_version))


def main():
opts = parse_opts()
if (opts.verbose):
Expand All @@ -175,9 +236,10 @@ def main():

task_queue = Queue.PriorityQueue()
for python_exec in python_execs:
if "COVERAGE_PROCESS_START" in os.environ:
# Make sure if coverage is installed.
run_cmd([python_exec, "-c", "import coverage"])
# Check if the python executable has proper dependencies installed to run tests
# for given modules properly.
_check_dependencies(python_exec, modules_to_test)

python_implementation = subprocess_check_output(
[python_exec, "-c", "import platform; print(platform.python_implementation())"],
universal_newlines=True).strip()
Expand Down

0 comments on commit fe2943e

Please sign in to comment.