Skip to content

Commit

Permalink
[SPARK-13578][CORE] Modify launch scripts to not use assemblies.
Browse files Browse the repository at this point in the history
Instead of looking for a specially-named assembly, the scripts now will
blindly add all jars under the libs directory to the classpath. This
libs directory is still currently the old assembly dir, so things should
keep working the same way as before until we make more packaging changes.

The only lost feature is the detection of multiple assemblies; I consider
that a minor nicety that only really affects few developers, so it's probably
ok.

Tested locally by running spark-shell; also did some minor Win32 testing
(just made sure spark-shell started).

Author: Marcelo Vanzin <[email protected]>

Closes apache#11591 from vanzin/SPARK-13578.
  • Loading branch information
Marcelo Vanzin authored and jeanlyn committed Mar 17, 2016
1 parent f987e0d commit 3f145e9
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 66 deletions.
29 changes: 7 additions & 22 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -35,42 +35,27 @@ else
fi
fi

# Find assembly jar
SPARK_ASSEMBLY_JAR=
# Find Spark jars.
# TODO: change the directory name when Spark jars move from "lib".
if [ -f "${SPARK_HOME}/RELEASE" ]; then
ASSEMBLY_DIR="${SPARK_HOME}/lib"
SPARK_JARS_DIR="${SPARK_HOME}/lib"
else
ASSEMBLY_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
fi

GREP_OPTIONS=
num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | wc -l)"
if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" -a "$SPARK_PREPEND_CLASSES" != "1" ]; then
echo "Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2
if [ ! -d "$SPARK_JARS_DIR" ]; then
echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2
echo "You need to build Spark before running this program." 1>&2
exit 1
fi
if [ -d "$ASSEMBLY_DIR" ]; then
ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" || true)"
if [ "$num_jars" -gt "1" ]; then
echo "Found multiple Spark assembly jars in $ASSEMBLY_DIR:" 1>&2
echo "$ASSEMBLY_JARS" 1>&2
echo "Please remove all but one jar." 1>&2
exit 1
fi
fi

SPARK_ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"

LAUNCH_CLASSPATH="$SPARK_ASSEMBLY_JAR"
LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"

# Add the launcher build dir to the classpath if requested.
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
fi

export _SPARK_ASSEMBLY="$SPARK_ASSEMBLY_JAR"

# For tests
if [[ -n "$SPARK_TESTING" ]]; then
unset YARN_CONF_DIR
Expand Down
18 changes: 6 additions & 12 deletions bin/spark-class2.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,27 @@ if "x%1"=="x" (
exit /b 1
)

rem Find assembly jar
set SPARK_ASSEMBLY_JAR=0

rem Find Spark jars.
rem TODO: change the directory name when Spark jars move from "lib".
if exist "%SPARK_HOME%\RELEASE" (
set ASSEMBLY_DIR="%SPARK_HOME%\lib"
set SPARK_JARS_DIR="%SPARK_HOME%\lib"
) else (
set ASSEMBLY_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%"
set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%"
)

for %%d in (%ASSEMBLY_DIR%\spark-assembly*hadoop*.jar) do (
set SPARK_ASSEMBLY_JAR=%%d
)
if "%SPARK_ASSEMBLY_JAR%"=="0" (
if not exist "%SPARK_JARS_DIR%"\ (
echo Failed to find Spark assembly JAR.
echo You need to build Spark before running this program.
exit /b 1
)

set LAUNCH_CLASSPATH=%SPARK_ASSEMBLY_JAR%
set LAUNCH_CLASSPATH=%SPARK_JARS_DIR%\*

rem Add the launcher build dir to the classpath if requested.
if not "x%SPARK_PREPEND_CLASSES%"=="x" (
set LAUNCH_CLASSPATH="%SPARK_HOME%\launcher\target\scala-%SPARK_SCALA_VERSION%\classes;%LAUNCH_CLASSPATH%"
)

set _SPARK_ASSEMBLY=%SPARK_ASSEMBLY_JAR%

rem Figure out where java is.
set RUNNER=java
if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.IOException;
Expand Down Expand Up @@ -172,21 +171,13 @@ List<String> buildClassPath(String appClassPath) throws IOException {
addToClassPath(cp, String.format("%s/core/target/jars/*", sparkHome));
}

// We can't rely on the ENV_SPARK_ASSEMBLY variable to be set. Certain situations, such as
// when running unit tests, or user code that embeds Spark and creates a SparkContext
// with a local or local-cluster master, will cause this code to be called from an
// environment where that env variable is not guaranteed to exist.
//
// For the testing case, we rely on the test code to set and propagate the test classpath
// appropriately.
//
// For the user code case, we fall back to looking for the Spark assembly under SPARK_HOME.
// That duplicates some of the code in the shell scripts that look for the assembly, though.
String assembly = getenv(ENV_SPARK_ASSEMBLY);
if (assembly == null && !isTesting) {
assembly = findAssembly();
// Add Spark jars to the classpath. For the testing case, we rely on the test code to set and
// propagate the test classpath appropriately. For normal invocation, look for the jars
// directory under SPARK_HOME.
String jarsDir = findJarsDir(!isTesting);
if (jarsDir != null) {
addToClassPath(cp, join(File.separator, jarsDir, "*"));
}
addToClassPath(cp, assembly);

// Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only
// included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate
Expand Down Expand Up @@ -320,28 +311,25 @@ private Properties loadPropertiesFile() throws IOException {
return props;
}

private String findAssembly() {
private String findJarsDir(boolean failIfNotFound) {
// TODO: change to the correct directory once the assembly build is changed.
String sparkHome = getSparkHome();
File libdir;
if (new File(sparkHome, "RELEASE").isFile()) {
libdir = new File(sparkHome, "lib");
checkState(libdir.isDirectory(), "Library directory '%s' does not exist.",
libdir.getAbsolutePath());
checkState(!failIfNotFound || libdir.isDirectory(),
"Library directory '%s' does not exist.",
libdir.getAbsolutePath());
} else {
libdir = new File(sparkHome, String.format("assembly/target/scala-%s", getScalaVersion()));
}

final Pattern re = Pattern.compile("spark-assembly.*hadoop.*\\.jar");
FileFilter filter = new FileFilter() {
@Override
public boolean accept(File file) {
return file.isFile() && re.matcher(file.getName()).matches();
if (!libdir.isDirectory()) {
checkState(!failIfNotFound,
"Library directory '%s' does not exist; make sure Spark is built.",
libdir.getAbsolutePath());
libdir = null;
}
};
File[] assemblies = libdir.listFiles(filter);
checkState(assemblies != null && assemblies.length > 0, "No assemblies found in '%s'.", libdir);
checkState(assemblies.length == 1, "Multiple assemblies found in '%s'.", libdir);
return assemblies[0].getAbsolutePath();
}
return libdir != null ? libdir.getAbsolutePath() : null;
}

private String getConfDir() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ class CommandBuilderUtils {
static final String DEFAULT_MEM = "1g";
static final String DEFAULT_PROPERTIES_FILE = "spark-defaults.conf";
static final String ENV_SPARK_HOME = "SPARK_HOME";
static final String ENV_SPARK_ASSEMBLY = "_SPARK_ASSEMBLY";

/** The set of known JVM vendors. */
static enum JavaVendor {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,6 @@ private boolean findInStringList(String list, String sep, String needle) {
private SparkSubmitCommandBuilder newCommandBuilder(List<String> args) {
SparkSubmitCommandBuilder builder = new SparkSubmitCommandBuilder(args);
builder.childEnv.put(CommandBuilderUtils.ENV_SPARK_HOME, System.getProperty("spark.test.home"));
builder.childEnv.put(CommandBuilderUtils.ENV_SPARK_ASSEMBLY, "dummy");
return builder;
}

Expand Down

0 comments on commit 3f145e9

Please sign in to comment.