Merge branch 'upmaster' into SPARK-36970

apache · Oct 12, 2021 · 0ad1959 · 0ad1959
2 parents 8271b47 + 36b3bbc
commit 0ad1959
Show file tree

Hide file tree

Showing 364 changed files with 2,140 additions and 1,940 deletions.
diff --git a/docs/index.md b/docs/index.md
@@ -39,8 +39,7 @@ source, visit [Building Spark](building-spark.html).
 
 Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS), and it should run on any platform that runs a supported version of Java. This should include JVMs on x86_64 and ARM64. It's easy to run locally on one machine --- all you need is to have `java` installed on your system `PATH`, or the `JAVA_HOME` environment variable pointing to a Java installation.
 
-Spark runs on Java 8/11, Scala 2.12, Python 3.6+ and R 3.5+.
-Python 3.6 support is deprecated as of Spark 3.2.0.
+Spark runs on Java 8/11, Scala 2.12, Python 3.7+ and R 3.5+.
 Java 8 prior to version 8u201 support is deprecated as of Spark 3.2.0.
 For the Scala API, Spark {{site.SPARK_VERSION}}
 uses Scala {{site.SCALA_BINARY_VERSION}}. You will need to use a compatible Scala version

diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
@@ -101,11 +101,11 @@ import org.apache.spark.SparkConf;
 
 <div data-lang="python"  markdown="1">
 
-Spark {{site.SPARK_VERSION}} works with Python 3.6+. It can use the standard CPython interpreter,
-so C libraries like NumPy can be used. It also works with PyPy 2.3+.
+Spark {{site.SPARK_VERSION}} works with Python 3.7+. It can use the standard CPython interpreter,
+so C libraries like NumPy can be used. It also works with PyPy 7.3.6+.
 
 Python 2, 3.4 and 3.5 supports were removed in Spark 3.1.0.
-Python 3.6 support is deprecated in Spark 3.2.0.
+Python 3.6 support was removed in Spark 3.3.0.
 
 Spark applications in Python can either be run with the `bin/spark-submit` script which includes Spark at runtime, or by including it in your setup.py as:
 

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
@@ -441,6 +441,19 @@ To use a custom metrics.properties for the application master and executors, upd
   </td>
   <td>1.6.0</td>
 </tr>
+<tr>
+  <td><code>spark.yarn.am.clientModeTreatDisconnectAsFailed</code></td>
+  <td>false</td>
+  <td>
+  Treat yarn-client unclean disconnects as failures. In yarn-client mode, normally the application will always finish
+  with a final status of SUCCESS because in some cases, it is not possible to know if the Application was terminated
+  intentionally by the user or if there was a real error. This config changes that behavior such that if the Application
+  Master disconnects from the driver uncleanly (ie without the proper shutdown handshake) the application will
+  terminate with a final status of FAILED. This will allow the caller to decide if it was truly a failure. Note that if
+  this config is set and the user just terminate the client application badly it may show a status of FAILED when it wasn't really FAILED.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
   <td><code>spark.yarn.am.clientModeExitOnError</code></td>
   <td>false</td>

diff --git a/pom.xml b/pom.xml
@@ -2657,7 +2657,7 @@
             <jvmArgs>
               <jvmArg>-Xss128m</jvmArg>
               <jvmArg>-Xms4g</jvmArg>
-              <jvmArg>-Xmx6g</jvmArg>
+              <jvmArg>-Xmx5g</jvmArg>
               <jvmArg>-XX:MaxMetaspaceSize=2g</jvmArg>
               <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
             </jvmArgs>
@@ -2707,7 +2707,7 @@
               <include>**/*Suite.java</include>
             </includes>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-            <argLine>-ea -Xmx6g -Xss4m -XX:MaxMetaspaceSize=2g -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true</argLine>
+            <argLine>-ea -Xmx5g -Xss4m -XX:MaxMetaspaceSize=2g -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true</argLine>
             <environmentVariables>
               <!--
                 Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
@@ -2758,7 +2758,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-ea -Xmx6g -Xss4m -XX:MaxMetaspaceSize=2g -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true</argLine>
+            <argLine>-ea -Xmx5g -Xss4m -XX:MaxMetaspaceSize=2g -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true</argLine>
             <stderr/>
             <environmentVariables>
               <!--

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -1127,12 +1127,12 @@ object TestSettings {
     // SPARK-29282 This is for consistency between JDK8 and JDK11.
     (Test / javaOptions) ++= {
       val metaspaceSize = sys.env.get("METASPACE_SIZE").getOrElse("1300m")
-      s"-Xmx6g -Xss4m -XX:MaxMetaspaceSize=$metaspaceSize -XX:+UseParallelGC -XX:-UseDynamicNumberOfGCThreads -XX:ReservedCodeCacheSize=128m"
+      s"-Xmx5g -Xss4m -XX:MaxMetaspaceSize=$metaspaceSize -XX:+UseParallelGC -XX:-UseDynamicNumberOfGCThreads -XX:ReservedCodeCacheSize=128m"
         .split(" ").toSeq
     },
     javaOptions ++= {
       val metaspaceSize = sys.env.get("METASPACE_SIZE").getOrElse("1300m")
-      s"-Xmx6g -XX:MaxMetaspaceSize=$metaspaceSize".split(" ").toSeq
+      s"-Xmx5g -XX:MaxMetaspaceSize=$metaspaceSize".split(" ").toSeq
     },
     (Test / javaOptions) ++= {
       val jdwpEnabled = sys.props.getOrElse("test.jdwp.enabled", "false").toBoolean

diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst
@@ -127,7 +127,7 @@ If you are using Conda, the development environment can be set as follows.
 
 .. code-block:: bash
 
-    # Python 3.6+ is required
+    # Python 3.7+ is required
     conda create --name pyspark-dev-env python=3.9
     conda activate pyspark-dev-env
     pip install -r dev/requirements.txt
@@ -143,7 +143,7 @@ Now, you can start developing and `running the tests <testing.rst>`_.
 pip
 ~~~
 
-With Python 3.6+, pip can be used as below to install and set up the development environment.
+With Python 3.7+, pip can be used as below to install and set up the development environment.
 
 .. code-block:: bash
 

diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
@@ -30,7 +30,7 @@ and building from the source.
 Python Version Supported
 ------------------------
 
-Python 3.6 and above.
+Python 3.7 and above.
 
 
 Using PyPI

diff --git a/python/pyspark/__init__.pyi b/python/pyspark/__init__.pyi
@@ -71,7 +71,7 @@ def since(version: Union[str, float]) -> Callable[[T], T]: ...
 def copy_func(
     f: F,
     name: Optional[str] = ...,
-    sinceversion: Optional[str] = ...,
+    sinceversion: Optional[Union[str, float]] = ...,
     doc: Optional[str] = ...,
 ) -> F: ...
 def keyword_only(func: F) -> F: ...