diff --git a/assembly/pom.xml b/assembly/pom.xml
index f1f8b0d3682e2..53059eaa34faf 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -92,6 +92,27 @@
true
+
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+
+
+ package
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
org.apache.maven.plugins
@@ -196,6 +217,19 @@
maven-assembly-plugin
2.4
+
dist
package
@@ -208,7 +242,7 @@
-
+
diff --git a/make-distribution.sh b/make-distribution.sh
index c9a26d78239b2..738a9c4d69601 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -228,7 +228,6 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
cp "$SPARK_HOME/README.md" "$DISTDIR"
cp -r "$SPARK_HOME/bin" "$DISTDIR"
cp -r "$SPARK_HOME/python" "$DISTDIR"
-zip -r "$DISTDIR"/python/lib/pyspark.zip "$SPARK_HOME"/python/lib/pyspark
cp -r "$SPARK_HOME/sbin" "$DISTDIR"
cp -r "$SPARK_HOME/ec2" "$DISTDIR"
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 09b4976d10c26..3bd70dc0f6af1 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -361,12 +361,20 @@ object PySparkAssembly {
// to be included in the assembly. We can't just add "python/" to the assembly's resource dir
// list since that will copy unneeded / unwanted files.
resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
+ val src = new File(BuildCommons.sparkHome, "python/pyspark")
+
+ val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
+ IO.delete(zipFile)
+ def entries(f: File):List[File] =
+ f :: (if (f.isDirectory) IO.listFiles(f).toList.flatMap(entries(_)) else Nil)
+ IO.zip(entries(src).map(
+ d => (d, d.getAbsolutePath.substring(src.getParent.length +1))),
+ zipFile)
+
val dst = new File(outDir, "pyspark")
if (!dst.isDirectory()) {
require(dst.mkdirs())
}
-
- val src = new File(BuildCommons.sparkHome, "python/pyspark")
copy(src, dst)
}
)