From b0af198edb4c3c57ae97f14670777d770d79bef2 Mon Sep 17 00:00:00 2001 From: prabinb Date: Thu, 17 Apr 2014 16:44:17 +0530 Subject: [PATCH] [SPARK-1267]Adding a pip installer setup file for PySpark. Introduced a new file pyspark/pyspark_version.py for maintaining PySpark version. Changed pyspark/__init__.py to cross validate SPARK_HOME variable and pyspark & spark version mismatch --- python/pyspark/__init__.py | 21 ++++++++++++++++++++- python/pyspark/pyspark_version.py | 1 + python/setup.py | 19 +++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 python/pyspark/pyspark_version.py create mode 100644 python/setup.py diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 73fe7378ffa63..9d4ece307f98d 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -53,8 +53,27 @@ import sys import os -sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip")) +import xml.etree.ElementTree as ET + +if (os.environ.get("SPARK_HOME", "not found") == "not found"): + raise ImportError("Environment variable SPARK_HOME is undefined.") + +spark_home = os.environ['SPARK_HOME'] +pom_xml_file_path = spark_home + '/pom.xml' +try: + tree = ET.parse(pom_xml_file_path) + root = tree.getroot() + version_tag = root[4].text + snapshot_version = version_tag[:5] +except: + raise ImportError("Could not read the spark version, because pom.xml file is not found in SPARK_HOME(%s) directory." %(spark_home)) + +from pyspark.pyspark_version import __version__ +if (snapshot_version != __version__): + raise ImportError("Incompatible version of Spark(%s) and PySpark(%s)." %(snapshot_version, __version__)) + +sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip")) from pyspark.conf import SparkConf from pyspark.context import SparkContext diff --git a/python/pyspark/pyspark_version.py b/python/pyspark/pyspark_version.py new file mode 100644 index 0000000000000..a5675f60f8a7e --- /dev/null +++ b/python/pyspark/pyspark_version.py @@ -0,0 +1 @@ +__version__='1.0.0' diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 0000000000000..5f27bd2a49caa --- /dev/null +++ b/python/setup.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +from setuptools import setup + +exec(compile(open("pyspark/pyspark_version.py").read(), + "pyspark/pyspark_version.py", 'exec')) +VERSION = __version__ + +setup(name = 'pyspark', + version = VERSION, + description = 'Apache Spark Python API', + author = 'Prabin Banka', + author_email = 'prabin.banka@imaginea.com', + url = 'https://github.com/apache/spark/tree/master/python', + packages = ['pyspark', 'pyspark.mllib'], + data_files = [('pyspark', ['pyspark/pyspark_version.py'])], + install_requires = ['numpy>=1.7', 'py4j==0.8.1'], + license = 'http://www.apache.org/licenses/LICENSE-2.0', + )