Skip to content

Commit

Permalink
Fix PySpark tests by setting SPARK_DIST_CLASSPATH
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshRosen committed Feb 17, 2016
1 parent 31854eb commit 906d8c8
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 7 deletions.
4 changes: 2 additions & 2 deletions dev/mima
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ generate_mima_ignore() {
org.apache.spark.tools.GenerateMIMAIgnore
}

generate_mima_ignore "$(build/sbt "export assembly/fullClasspath" | tail -n1)"
generate_mima_ignore "$(build/sbt "export oldDeps/fullClasspath" | tail -n1)"
generate_mima_ignore "$(build/sbt "export assembly/managedClasspath" | tail -n1)"
generate_mima_ignore "$(build/sbt "export oldDeps/managedClasspath" | tail -n1)"

echo -e "q\n" | build/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
ret_val=$?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,27 @@ List<String> buildClassPath(String appClassPath) throws IOException {
boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
if (prependClasses || isTesting) {
String scala = getScalaVersion();
List<String> projects = Arrays.asList("core", "repl", "mllib", "graphx",
"streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver",
"yarn", "launcher", "network/common", "network/shuffle", "network/yarn");
// All projects _except_ assembly and the external/ projects
List<String> projects = Arrays.asList(
"common/sketch",
"core",
"docker-integration-tests",
"examples",
"graphx",
"launcher",
"mllib",
"network/common",
"network/shuffle",
"repl",
"sql/catalyst",
"sql/core",
"sql/hive",
"sql/hive-thriftserver",
"streaming",
"tags",
"tools",
"unsafe"
);
if (prependClasses) {
if (!isTesting) {
System.err.println(
Expand Down
24 changes: 22 additions & 2 deletions python/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,30 @@ def print_red(text):
LOGGER = logging.getLogger()


def get_spark_dist_classpath():
original_working_dir = os.getcwd()
os.chdir(SPARK_HOME)
cp = subprocess_check_output(
["./build/sbt", "export assembly/managedClasspath"], universal_newlines=True)
cp = cp.strip().split("\n")[-1]
os.chdir(original_working_dir)
return cp


SPARK_DIST_CLASSPATH = get_spark_dist_classpath()


def run_individual_python_test(test_name, pyspark_python):
env = dict(os.environ)
env.update({'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python),
'PYSPARK_DRIVER_PYTHON': which(pyspark_python)})
env.update({
# Setting SPARK_DIST_CLASSPATH is a simple way to make sure that any child processes
# launched by the tests have access to the correct test-time classpath.
'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH,
'SPARK_TESTING': '1',
'SPARK_PREPEND_CLASSES': '1',
'PYSPARK_PYTHON': which(pyspark_python),
'PYSPARK_DRIVER_PYTHON': which(pyspark_python),
})
LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name)
start_time = time.time()
try:
Expand Down

0 comments on commit 906d8c8

Please sign in to comment.