diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 73fe7378ffa63..9d4ece307f98d 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -53,8 +53,27 @@ import sys import os -sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip")) +import xml.etree.ElementTree as ET + +if (os.environ.get("SPARK_HOME", "not found") == "not found"): + raise ImportError("Environment variable SPARK_HOME is undefined.") + +spark_home = os.environ['SPARK_HOME'] +pom_xml_file_path = spark_home + '/pom.xml' +try: + tree = ET.parse(pom_xml_file_path) + root = tree.getroot() + version_tag = root[4].text + snapshot_version = version_tag[:5] +except: + raise ImportError("Could not read the spark version, because pom.xml file is not found in SPARK_HOME(%s) directory." %(spark_home)) + +from pyspark.pyspark_version import __version__ +if (snapshot_version != __version__): + raise ImportError("Incompatible version of Spark(%s) and PySpark(%s)." %(snapshot_version, __version__)) + +sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip")) from pyspark.conf import SparkConf from pyspark.context import SparkContext diff --git a/python/pyspark/pyspark_version.py b/python/pyspark/pyspark_version.py new file mode 100644 index 0000000000000..a5675f60f8a7e --- /dev/null +++ b/python/pyspark/pyspark_version.py @@ -0,0 +1 @@ +__version__='1.0.0' diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 0000000000000..5f27bd2a49caa --- /dev/null +++ b/python/setup.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +from setuptools import setup + +exec(compile(open("pyspark/pyspark_version.py").read(), + "pyspark/pyspark_version.py", 'exec')) +VERSION = __version__ + +setup(name = 'pyspark', + version = VERSION, + description = 'Apache Spark Python API', + author = 'Prabin Banka', + author_email = 'prabin.banka@imaginea.com', + url = 'https://github.com/apache/spark/tree/master/python', + packages = ['pyspark', 'pyspark.mllib'], + data_files = [('pyspark', ['pyspark/pyspark_version.py'])], + install_requires = ['numpy>=1.7', 'py4j==0.8.1'], + license = 'http://www.apache.org/licenses/LICENSE-2.0', + )